import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import plotly.express as px
import plotly.graph_objects as go
pd.options.display.float_format = '{:.0f}'.format
df = pd.read_csv("nst-est2019-alldata.csv")
df.head()
| SUMLEV | REGION | DIVISION | STATE | NAME | CENSUS2010POP | ESTIMATESBASE2010 | POPESTIMATE2010 | POPESTIMATE2011 | POPESTIMATE2012 | ... | RDOMESTICMIG2019 | RNETMIG2011 | RNETMIG2012 | RNETMIG2013 | RNETMIG2014 | RNETMIG2015 | RNETMIG2016 | RNETMIG2017 | RNETMIG2018 | RNETMIG2019 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10 | 0 | 0 | 0 | United States | 308745538 | 308758105 | 309321666 | 311556874 | 313830990 | ... | 0 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 2 | 2 |
| 1 | 20 | 1 | 0 | 0 | Northeast Region | 55317240 | 55318443 | 55380134 | 55604223 | 55775216 | ... | -5 | 1 | -0 | -0 | -1 | -2 | -2 | -2 | -2 | -3 |
| 2 | 20 | 2 | 0 | 0 | Midwest Region | 66927001 | 66929725 | 66974416 | 67157800 | 67336743 | ... | -2 | -1 | -1 | -0 | -1 | -1 | -1 | -1 | -1 | -1 |
| 3 | 20 | 3 | 0 | 0 | South Region | 114555744 | 114563030 | 114866680 | 116006522 | 117241208 | ... | 3 | 5 | 6 | 5 | 6 | 7 | 7 | 6 | 5 | 5 |
| 4 | 20 | 4 | 0 | 0 | West Region | 71945553 | 71946907 | 72100436 | 72788329 | 73477823 | ... | 1 | 3 | 3 | 3 | 4 | 5 | 5 | 4 | 3 | 2 |
5 rows × 151 columns
After taking an initial look at the data, we can see that there are quite a few extraneous rows & columns that we do not need for our analysis.
Since we are trying to predict the population in 2020 (and then using that information to figure out how many electoral votes each state is going to get for the 2024 and 2028 elections), we'll only keep columns that contain population information for 2010 and 2019
df = df.drop(df.columns[27:], axis = 1)
df = df.drop(df.columns[0], axis = 1)
df_house = df[['NAME','CENSUS2010POP','POPESTIMATE2010','POPESTIMATE2019']]
df_house = df_house.drop([0,1,2,3,4,13,56])
df_house.reset_index(drop=True, inplace=True)
Now we will predict the population for each state using the formula \begin{equation*}A=Pe^{rt}\end{equation*}
where \begin{equation*}t = 1\end{equation*}
\begin{equation*}r = (\frac{1}{9})(\frac{pop_{2019}}{pop_{2010}} - 1)\end{equation*} \begin{equation*}P = pop_{2019}\end{equation*}We will also account for the fact that the population estimates are as of July 1st of that year, whereas the Census estimate is based on April 1st numbers
df_house['POPESTIMATE2020'] = np.zeros(50)
df_house['CENSUS2020POP'] = np.zeros(50)
for index, row in df_house.iterrows():
rate = (row['POPESTIMATE2019']/row['POPESTIMATE2010']) - 1
rate = rate/9
rate_census = rate*9/12
pop_2020_est = row['POPESTIMATE2019']*np.exp(rate)
pop_2020_census = row['POPESTIMATE2019']*np.exp(rate_census)
df_house.loc[index,'POPESTIMATE2020'] = pop_2020_est
df_house.loc[index,'CENSUS2020POP'] = pop_2020_census
df_house.head()
| NAME | CENSUS2010POP | POPESTIMATE2010 | POPESTIMATE2019 | POPESTIMATE2020 | CENSUS2020POP | |
|---|---|---|---|---|---|---|
| 0 | Alabama | 4779736 | 4785437 | 4903185 | 4916608 | 4913249 |
| 1 | Alaska | 710231 | 713910 | 731545 | 733556 | 733052 |
| 2 | Arizona | 6392017 | 6407172 | 7278717 | 7389563 | 7361695 |
| 3 | Arkansas | 2915918 | 2921964 | 3017804 | 3028822 | 3026064 |
| 4 | California | 37253956 | 37319502 | 39512223 | 39771017 | 39706160 |
Now that we have our 2020 Census Population estimate, we will use the Huntington-Hill method to allocate votes to each state. Each state starts off with one vote and the next state to receive a vote is the state with the highest priority number. The priority number of the nth state is equal to \begin{equation*}\frac{P_n}{\sqrt{v(v+1)}}\end{equation*}
where Pn is the population of that state and v is the number of votes that the state currently has.
After every round (where one vote has been allocated), the state which received the vote has its priority number recalculated. We will do this 435 times: 538 electoral votes - 100 Senate Votes - 3 votes for D.C.
If this number looks familiar, you are right; it is the number of seats in the House of Representatives (we are really just allocating House seats here). To test our code, we will apportion votes based on 2010 Census data and compare that to the actual allocation of votes that took place
# create list of votes and priority numbers for each state
seats_2010 = [1 for i in range(50)]
seats_2020 = [1 for i in range(50)]
priority_2010 = [0 for i in range(50)]
priority_2020 = [0 for i in range(50)]
seats_left = 435-50
# initialize priority numbers for each state
for index in range(50):
num_seats_2020 = seats_2020[index]
priority_2020[index] = df_house.loc[index,"CENSUS2020POP"]/math.sqrt(num_seats_2020*(num_seats_2020+1))
num_seats_2010 = seats_2010[index]
priority_2010[index] = df_house.loc[index,"CENSUS2010POP"]/math.sqrt(num_seats_2010*(num_seats_2010+1))
def allocate(df,seats,priority,year):
# find index of biggest priority number and increment the seat count at that index by 1
max_index = priority.index(max(priority))
seats[max_index] += 1
# recalculate the priority number for the state at the index
if (year == 2010):
new_priority = df.loc[max_index,"CENSUS2010POP"]/math.sqrt(seats[max_index]*(seats[max_index]+1))
else:
new_priority = df.loc[max_index,"CENSUS2020POP"]/math.sqrt(seats[max_index]*(seats[max_index]+1))
priority[max_index] = new_priority
After we have allocated all the votes, we will add 2 to each state's vote count (# of Senators) and find the difference between its 2010 and 2020 vote totals
# allocate
for year in range(seats_left):
allocate(df_house,seats_2010,priority_2010,2010)
allocate(df_house,seats_2020,priority_2020,2020)
# add Senate seats
seats_2010 = [x+2 for x in seats_2010]
seats_2020 = [x+2 for x in seats_2020]
seats_diff = [x - y for x,y in zip(seats_2020,seats_2010)]
Now let's create a new data frame with vote counts for each state. Then we will add each state's 2 letter code.
# create new data frame
df_ec = df_house['NAME'].to_frame()
df_ec['Seats2010'] = pd.Series(seats_2010)
df_ec['Seats2020'] = pd.Series(seats_2020)
df_ec['SeatsChange'] = pd.Series(seats_diff)
df_ec.head()
| NAME | Seats2010 | Seats2020 | SeatsChange | |
|---|---|---|---|---|
| 0 | Alabama | 9 | 8 | -1 |
| 1 | Alaska | 3 | 3 | 0 |
| 2 | Arizona | 11 | 12 | 1 |
| 3 | Arkansas | 6 | 6 | 0 |
| 4 | California | 55 | 54 | -1 |
# add 2 letter code for each state
state_codes = pd.read_csv("StateCode.csv")
state_codes = state_codes.drop(index = [2,9,10,13,23,39,43,45,53]).reset_index(drop=True)
df_ec['Code'] = state_codes['Code']
We will use the plotly graph object package to create a visual for our data
plot = go.Figure(data = go.Choropleth(locations = df_ec['Code'], z = df_ec['Seats2010'], locationmode = 'USA-states',
colorbar_title = "EC Votes", colorscale = "Reds", text = df_ec['NAME'].astype(str)))
plot.update_layout(title_text = 'Electoral Votes by State After 2010 Census', geo_scope='usa')
plot.show()
plot = go.Figure(data = go.Choropleth(locations = df_ec['Code'], z = df_ec['Seats2020'], locationmode = 'USA-states',
colorbar_title = "EC Votes", colorscale = "Reds", text = df_ec['NAME'].astype(str)))
plot.update_layout(title_text = 'Predicted Electoral Votes by State After 2020 Census', geo_scope='usa')
plot.show()
plot = go.Figure(data = go.Choropleth(locations = df_ec['Code'], z = df_ec['SeatsChange'], locationmode = 'USA-states',
colorbar_title = "Change", colorscale = "RdBu", text = df_ec['NAME'].astype(str)))
plot.update_layout(title_text = 'Predicted Change in Electoral Votes After 2020 Census', geo_scope='usa')
plot.show()
Now we will look at some election results from the last 50 years using the Plotly Express Choropleth package
# read in data set and drop unneeded columns and DC data
df_elec_col = pd.read_csv("Electoral_College.csv")
df_elec_col = df_elec_col.drop(df_elec_col.columns[4:], axis = 1)
df_elec_col = df_elec_col[df_elec_col['State'] != "D.C."]
# add 2 letter state codes
df_elec_col['Code'] = pd.Series(["STCD"])
map_state = dict(zip(list(df_elec_col['State'].unique()),
list(df_ec['Code'].values)))
for index, row in df_elec_col.iterrows():
df_elec_col.loc[index,'Code'] = map_state[row['State']]
# get all election results after 1970
df_elec_col_1970 = df_elec_col[df_elec_col['Year'] >= 1970]
# create dictionary of Party-Color information that will be used to accurately fill in the map
party_colors = {}
party_colors['D'] = "Blue"
party_colors['R'] = "Red"
px.choropleth(data_frame = df_elec_col_1970, locations = 'Code', locationmode = 'USA-states',
color = 'Party', scope = 'usa', title = "Election Results (1972 - 2020)",
animation_frame = 'Year', color_discrete_map = party_colors)
hover = {}
hover['Year'] = False
hover['State'] = True
hover['Code'] = False
hover['Votes'] = False
px.choropleth(data_frame = df_elec_col, locations = 'Code', locationmode = 'USA-states', color = 'Votes',
scope = 'usa', hover_name = 'Votes',hover_data = hover, title = "Electoral Votes Over Time",
animation_frame = 'Year', color_continuous_scale = "Reds")
Now you may have noticed that the votes in the Electoral College are not allocated perfectly according to each state's population. Each state automatically starts off with 3 votes (2 Senate Votes + at least 1 House Seat). This is one point of criticism frequently made by detractors of the Electoral College. It undervalues the people living in states like California, Texas, New York, and Florida, while overrepresenting the people in Wyoming, Vermont, North Dakota, and Alaska.
To understand this disparity and get a visual of how bad it is, we will look at Census Data from 1960 all the way to our previously predicted 2020 figures.
Our first step is to load in the data set, get rid of the D.C. data, add 2 letter state codes,and add our predicted population numbers for 2020 to the data set.
df_census = pd.read_csv('census.csv')
df_census = df_census[df_census['State'] != " District of Columbia"]
df_census['Code'] = pd.Series(["STCD"])
for index, row in df_census.iterrows():
name = row['State']
df_census.loc[index, 'State'] = name[1:]
for index, row in df_census.iterrows():
df_census.loc[index,'Code'] = map_state[row['State']]
df_temp_2020 = df_house[['NAME','CENSUS2020POP']]
df_temp_2020.insert(0, "Year", pd.Series([2020 for x in range(50)]))
df_temp_2020.insert(3, "Code", df_ec['Code'])
df_temp_2020.columns = ['Year','State','Population','Code']
df_census = pd.concat([df_census,df_temp_2020], axis=0)
df_census.reset_index(drop=True, inplace=True)
Now we will do what we did earlier, allocating electoral votes to each state and doing this for every 10-year period. However, there is a catch: we will do this the normal way (each state's electoral votes being equal to the number of its Representatives + its number of Senators) and in a truly proportional way (allocate 537 votes among the 50 states).
Why 537? 538 total votes - 1 vote for D.C. (based on its population size)
# get list of states
states = list(map_state.keys())
def allocate2(df,seats,priority,year):
# find index of state with biggest priority number and increment seat count for that state by 1
max_index = priority.index(max(priority))
seats[max_index] += 1
state = states[max_index]
# recalculate the priority number for the state at the index
df_subset = df[df['Year'] == year]
row = df_subset[df_subset['State'] == state]
new_priority = float(row['Population'])/math.sqrt(seats[max_index]*(seats[max_index]+1))
priority[max_index] = new_priority
# create list of seats and priority numbers for each state and decade
seats_1960 = [1 for x in range(50)]
seats_1970 = [1 for x in range(50)]
seats_1980 = [1 for x in range(50)]
seats_1990 = [1 for x in range(50)]
seats_2000 = [1 for x in range(50)]
seats_2010 = [1 for x in range(50)]
seats_2020 = [1 for x in range(50)]
priority_1960 = []
priority_1970 = []
priority_1980 = []
priority_1990 = []
priority_2000 = []
priority_2010 = []
priority_2020 = []
# initialize priority numbers for each state
for index, row in df_census.iterrows():
if row['Year'] == 1960:
priority_1960.append(row['Population']/math.sqrt(2))
elif row['Year'] == 1970:
priority_1970.append(row['Population']/math.sqrt(2))
elif row['Year'] == 1980:
priority_1980.append(row['Population']/math.sqrt(2))
elif row['Year'] == 1990:
priority_1990.append(row['Population']/math.sqrt(2))
elif row['Year'] == 2000:
priority_2000.append(row['Population']/math.sqrt(2))
elif row['Year'] == 2010:
priority_2010.append(row['Population']/math.sqrt(2))
elif row['Year'] == 2020:
priority_2020.append(row['Population']/math.sqrt(2))
# allocate
seats_left = 435-50
for year in range(seats_left):
allocate2(df_census,seats_1960,priority_1960,1960)
allocate2(df_census,seats_1970,priority_1970,1970)
allocate2(df_census,seats_1980,priority_1980,1980)
allocate2(df_census,seats_1990,priority_1990,1990)
allocate2(df_census,seats_2000,priority_2000,2000)
allocate2(df_census,seats_2010,priority_2010,2010)
allocate2(df_census,seats_2020,priority_2020,2020)
# add 2 Senate votes for each state in each decade
seats_1960 = [x+2 for x in seats_1960]
seats_1970 = [x+2 for x in seats_1970]
seats_1980 = [x+2 for x in seats_1980]
seats_1990 = [x+2 for x in seats_1990]
seats_2000 = [x+2 for x in seats_2000]
seats_2010 = [x+2 for x in seats_2010]
seats_2020 = [x+2 for x in seats_2020]
# combine all the seat count lists together so we can add it to the data frame
seats_1960.extend(seats_1970)
seats_1960.extend(seats_1980)
seats_1960.extend(seats_1990)
seats_1960.extend(seats_2000)
seats_1960.extend(seats_2010)
seats_1960.extend(seats_2020)
# add a column to the data frame that stores each state's electoral votes for the 10-yr period after that Census
df_census['VotesA'] = pd.Series(seats_1960)
df_census.head()
| Year | State | Population | Code | VotesA | |
|---|---|---|---|---|---|
| 0 | 1960 | Alabama | 3266740 | AL | 10 |
| 1 | 1960 | Alaska | 226167 | AK | 3 |
| 2 | 1960 | Arizona | 1302161 | AZ | 5 |
| 3 | 1960 | Arkansas | 1786272 | AR | 6 |
| 4 | 1960 | California | 15717204 | CA | 40 |
Now we will do the exact same process using a true proportional allocation
seats_1960 = [1 for x in range(50)]
seats_1970 = [1 for x in range(50)]
seats_1980 = [1 for x in range(50)]
seats_1990 = [1 for x in range(50)]
seats_2000 = [1 for x in range(50)]
seats_2010 = [1 for x in range(50)]
seats_2020 = [1 for x in range(50)]
priority_1960 = []
priority_1970 = []
priority_1980 = []
priority_1990 = []
priority_2000 = []
priority_2010 = []
priority_2020 = []
for index, row in df_census.iterrows():
if row['Year'] == 1960:
priority_1960.append(row['Population']/math.sqrt(2))
elif row['Year'] == 1970:
priority_1970.append(row['Population']/math.sqrt(2))
elif row['Year'] == 1980:
priority_1980.append(row['Population']/math.sqrt(2))
elif row['Year'] == 1990:
priority_1990.append(row['Population']/math.sqrt(2))
elif row['Year'] == 2000:
priority_2000.append(row['Population']/math.sqrt(2))
elif row['Year'] == 2010:
priority_2010.append(row['Population']/math.sqrt(2))
elif row['Year'] == 2020:
priority_2020.append(row['Population']/math.sqrt(2))
seats_left = 537-50
for year in range(seats_left):
allocate2(df_census,seats_1960,priority_1960,1960)
allocate2(df_census,seats_1970,priority_1970,1970)
allocate2(df_census,seats_1980,priority_1980,1980)
allocate2(df_census,seats_1990,priority_1990,1990)
allocate2(df_census,seats_2000,priority_2000,2000)
allocate2(df_census,seats_2010,priority_2010,2010)
allocate2(df_census,seats_2020,priority_2020,2020)
seats_1960.extend(seats_1970)
seats_1960.extend(seats_1980)
seats_1960.extend(seats_1990)
seats_1960.extend(seats_2000)
seats_1960.extend(seats_2010)
seats_1960.extend(seats_2020)
df_census['VotesP'] = pd.Series(seats_1960)
df_census.head()
| Year | State | Population | Code | VotesA | VotesP | |
|---|---|---|---|---|---|---|
| 0 | 1960 | Alabama | 3266740 | AL | 10 | 10 |
| 1 | 1960 | Alaska | 226167 | AK | 3 | 1 |
| 2 | 1960 | Arizona | 1302161 | AZ | 5 | 4 |
| 3 | 1960 | Arkansas | 1786272 | AR | 6 | 5 |
| 4 | 1960 | California | 15717204 | CA | 40 | 47 |
To see the disparity between the largest and smallests states, we will calculate the population per electoral vote for each row.
df_census['PopPerVoteA'] = np.zeros(len(df_census))
df_census['PopPerVoteP'] = np.zeros(len(df_census))
for index, row in df_census.iterrows():
df_census.loc[index,"PopPerVoteA"] = round(float(row['Population'])/float(row['VotesA']))
df_census.loc[index,"PopPerVoteP"] = round(float(row['Population'])/float(row['VotesP']))
df_census.head()
| Year | State | Population | Code | VotesA | VotesP | PopPerVoteA | PopPerVoteP | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1960 | Alabama | 3266740 | AL | 10 | 10 | 326674 | 326674 |
| 1 | 1960 | Alaska | 226167 | AK | 3 | 1 | 75389 | 226167 |
| 2 | 1960 | Arizona | 1302161 | AZ | 5 | 4 | 260432 | 325540 |
| 3 | 1960 | Arkansas | 1786272 | AR | 6 | 5 | 297712 | 357254 |
| 4 | 1960 | California | 15717204 | CA | 40 | 47 | 392930 | 334409 |
To see how many votes each state is missing, we will take the difference of each state's actual electoral votes and its votes if they were allocated perfectly proportionally.
df_census['Diff'] = np.zeros(len(df_census))
for index, row in df_census.iterrows():
df_census.loc[index,"Diff"] = float(row['VotesA'])-float(row['VotesP'])
df_census.head()
| Year | State | Population | Code | VotesA | VotesP | PopPerVoteA | PopPerVoteP | Diff | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1960 | Alabama | 3266740 | AL | 10 | 10 | 326674 | 326674 | 0 |
| 1 | 1960 | Alaska | 226167 | AK | 3 | 1 | 75389 | 226167 | 2 |
| 2 | 1960 | Arizona | 1302161 | AZ | 5 | 4 | 260432 | 325540 | 1 |
| 3 | 1960 | Arkansas | 1786272 | AR | 6 | 5 | 297712 | 357254 | 1 |
| 4 | 1960 | California | 15717204 | CA | 40 | 47 | 392930 | 334409 | -7 |
Now we will plot the difference in votes for each state over many decades
hover = {}
hover['Year'] = False
hover['State'] = True
hover['Code'] = False
hover['VotesA'] = False
hover['VotesP'] = False
hover['Population'] = False
hover['PopPerVoteA'] = False
hover['PopPerVoteP'] = False
hover['Diff'] = False
px.choropleth(data_frame = df_census, locations = 'Code', locationmode = 'USA-states', color = 'Diff',
scope = 'usa', hover_name = 'Diff',hover_data = hover, title = "Actual Votes - Proportional Votes",
animation_frame = 'Year', color_continuous_scale = "RdYlGn")
To get a better picture of how the Electoral College underrepresents some states and overrepresents others, let's create a visual that shows how many people are represented by one electoral college vote in each state.
hover = {}
hover['Year'] = False
hover['State'] = True
hover['Code'] = False
hover['VotesA'] = False
hover['VotesP'] = False
hover['Population'] = False
hover['PopPerVoteA'] = False
hover['PopPerVoteP'] = False
hover['Diff'] = False
px.choropleth(data_frame = df_census, locations = 'Code', locationmode = 'USA-states', color = 'PopPerVoteA',
scope = 'usa', hover_name = 'PopPerVoteA',hover_data = hover, title = "Population per Actual Electoral Vote",
animation_frame = 'Year', color_continuous_scale = "RdYlGn")
Now let's do the same thing we did in the previous part, but this time, we will use a state's theoretical votes (if they were allocated perfectly proportionally). As you can expect, the disparity between a state like California and a state like Wyoming will not be as big in this scenario.
hover = {}
hover['Year'] = False
hover['State'] = True
hover['Code'] = False
hover['VotesA'] = False
hover['VotesP'] = False
hover['Population'] = False
hover['PopPerVoteA'] = False
hover['PopPerVoteP'] = False
hover['Diff'] = False
px.choropleth(data_frame = df_census, locations = 'Code', locationmode = 'USA-states', color = 'PopPerVoteP',
scope = 'usa', hover_name = 'PopPerVoteP',hover_data = hover,
title = "Population per Theoretical Electoral Vote",
animation_frame = 'Year', color_continuous_scale = "RdYlGn")
But even the maps we just created make it hard to discern the difference between states.
Let's visualize the same data in a more efficient manner by normalizing it; to do this, let's divide each state's population per electoral vote by the lowest population per electoral vote for that 10 year period (across all states).
So in 1960, Alaska will have a value equal to 1 (since it has the lowest number for this metric), while California will have a value closer to 5.
df_census['PPVAMult'] = np.zeros(len(df_census))
df_census['PPVPMult'] = np.zeros(len(df_census))
min_1960_a = min(df_census[df_census['Year'] == 1960]['PopPerVoteA'])
min_1970_a = min(df_census[df_census['Year'] == 1970]['PopPerVoteA'])
min_1980_a = min(df_census[df_census['Year'] == 1980]['PopPerVoteA'])
min_1990_a = min(df_census[df_census['Year'] == 1990]['PopPerVoteA'])
min_2000_a = min(df_census[df_census['Year'] == 2000]['PopPerVoteA'])
min_2010_a = min(df_census[df_census['Year'] == 2010]['PopPerVoteA'])
min_2020_a = min(df_census[df_census['Year'] == 2020]['PopPerVoteA'])
min_1960_p = min(df_census[df_census['Year'] == 1960]['PopPerVoteP'])
min_1970_p = min(df_census[df_census['Year'] == 1970]['PopPerVoteP'])
min_1980_p = min(df_census[df_census['Year'] == 1980]['PopPerVoteP'])
min_1990_p = min(df_census[df_census['Year'] == 1990]['PopPerVoteP'])
min_2000_p = min(df_census[df_census['Year'] == 2000]['PopPerVoteP'])
min_2010_p = min(df_census[df_census['Year'] == 2010]['PopPerVoteP'])
min_2020_p = min(df_census[df_census['Year'] == 2020]['PopPerVoteP'])
for index, row in df_census.iterrows():
if row['Year'] == 1960:
df_census.loc[index,'PPVAMult'] = round(float(row['PopPerVoteA'])/min_1960_a,2)
df_census.loc[index,'PPVPMult'] = round(float(row['PopPerVoteP'])/min_1960_p,2)
elif row['Year'] == 1970:
df_census.loc[index,'PPVAMult'] = round(float(row['PopPerVoteA'])/min_1970_a,2)
df_census.loc[index,'PPVPMult'] = round(float(row['PopPerVoteP'])/min_1970_p,2)
elif row['Year'] == 1980:
df_census.loc[index,'PPVAMult'] = round(float(row['PopPerVoteA'])/min_1980_a,2)
df_census.loc[index,'PPVPMult'] = round(float(row['PopPerVoteP'])/min_1980_p,2)
elif row['Year'] == 1990:
df_census.loc[index,'PPVAMult'] = round(float(row['PopPerVoteA'])/min_1990_a,2)
df_census.loc[index,'PPVPMult'] = round(float(row['PopPerVoteP'])/min_1990_p,2)
elif row['Year'] == 2000:
df_census.loc[index,'PPVAMult'] = round(float(row['PopPerVoteA'])/min_2000_a,2)
df_census.loc[index,'PPVPMult'] = round(float(row['PopPerVoteP'])/min_2000_p,2)
elif row['Year'] == 2010:
df_census.loc[index,'PPVAMult'] = round(float(row['PopPerVoteA'])/min_2010_a,2)
df_census.loc[index,'PPVPMult'] = round(float(row['PopPerVoteP'])/min_2010_p,2)
elif row['Year'] == 2020:
df_census.loc[index,'PPVAMult'] = round(float(row['PopPerVoteA'])/min_2020_a,2)
df_census.loc[index,'PPVPMult'] = round(float(row['PopPerVoteP'])/min_2020_p,2)
df_census.head()
| Year | State | Population | Code | VotesA | VotesP | PopPerVoteA | PopPerVoteP | Diff | PPVAMult | PPVPMult | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1960 | Alabama | 3266740 | AL | 10 | 10 | 326674 | 326674 | 0 | 4 | 1 |
| 1 | 1960 | Alaska | 226167 | AK | 3 | 1 | 75389 | 226167 | 2 | 1 | 1 |
| 2 | 1960 | Arizona | 1302161 | AZ | 5 | 4 | 260432 | 325540 | 1 | 3 | 1 |
| 3 | 1960 | Arkansas | 1786272 | AR | 6 | 5 | 297712 | 357254 | 1 | 4 | 2 |
| 4 | 1960 | California | 15717204 | CA | 40 | 47 | 392930 | 334409 | -7 | 5 | 1 |
If you are having trouble with how to interpret this number, here is an example: In 1960, Alaska has a value of 1 and California has a value of 5.21. This means that each Alaskan vote is equal to 5.21 Californian votes. Alternatively, a person in Alaska has 5.21 times the voting power of a person in California.
hover = {}
hover['Year'] = False
hover['State'] = True
hover['Code'] = False
hover['VotesA'] = False
hover['VotesP'] = False
hover['Population'] = False
hover['PopPerVoteA'] = False
hover['PopPerVoteP'] = False
hover['Diff'] = False
hover['PPVAMult'] = False
hover['PPVPMult'] = False
px.choropleth(data_frame = df_census, locations = 'Code', locationmode = 'USA-states', color = 'PPVAMult',
scope = 'usa', hover_name = 'PPVAMult',hover_data = hover,
title = "Voting Power of the Most Overrepresented State Relative to Other States",
animation_frame = 'Year', color_continuous_scale = "Reds")
Let's do the same thing for the votes that we allocated proportionally. Once again, states with smaller populations tend to have an advantage, but the differences are not as grotesque as the current system.
hover = {}
hover['Year'] = False
hover['State'] = True
hover['Code'] = False
hover['VotesA'] = False
hover['VotesP'] = False
hover['Population'] = False
hover['PopPerVoteA'] = False
hover['PopPerVoteP'] = False
hover['Diff'] = False
hover['PPVAMult'] = False
hover['PPVPMult'] = False
px.choropleth(data_frame = df_census, locations = 'Code', locationmode = 'USA-states', color = 'PPVPMult',
scope = 'usa', hover_name = 'PPVPMult',hover_data = hover,
title = "Voting Power of the Most Overrepresented State Relative to Other States",
animation_frame = 'Year', color_continuous_scale = "Reds")
Now that we know that the Electoral College distorts votes, let's see if that distortion may have affected the results of any recent presidential elections. The election we will look at is the 2000 election, where 537 votes in the state of Florida gave George W. Bush the edge over Al Gore.
To see if the electoral college helped swing the election, we will find how many electoral votes each candidate would have received had all votes been allocated proportionally. For this theoretical vote count, we will use the "theoretical" allocation of seats after the 1990 Census (since the 2000 Census was used to decide the allocation of seats for 2004 and 2008).
df_elec_col_copy = df_elec_col.copy()
df_2000 = df_elec_col_copy[df_elec_col_copy['Year'] == 2000]
df_2000.reset_index(drop=True,inplace=True)
df_census_1990 = df_census[df_census['Year'] == 1990]
votes_p_2000 = df_census_1990['VotesP'].tolist()
votes_Bush = 0
votes_Gore = 0
for index, row in df_2000.iterrows():
if row['Party'] == 'R':
votes_Bush += votes_p_2000[index]
else:
votes_Gore += votes_p_2000[index]
votes_Gore += 1
print('Bush:',votes_Bush)
print('Gore:',votes_Gore)
Bush: 260 Gore: 278
Turns out Gore would have won the election under this scenario.